import os
import sys
import pandas as pd
import plotly.express as px
import os
os.chdir("../..")
os.getcwd()
'/Volumes/GoogleDrive/My Drive/Computer Backups/Rahul Yerrabelli drive/PythonProjects/GeospatialAnalysis'
import geopandas
import rioxarray # Surface data manipulation
import xarray # Surface data manipulation
from pysal.explore import esda # Exploratory Spatial analytics
from pysal.lib import weights # Spatial weights
import contextily # Background tiles
OMP: Info #276: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
/Users/ryerrabelli/.conda/envs/GeospatialAnalysis/lib/python3.8/site-packages/spaghetti/network.py:36: FutureWarning: The next major release of pysal/spaghetti (2.0.0) will drop support for all ``libpysal.cg`` geometries. This change is a first step in refactoring ``spaghetti`` that is expected to result in dramatically reduced runtimes for network instantiation and operations. Users currently requiring network and point pattern input as ``libpysal.cg`` geometries should prepare for this simply by converting to ``shapely`` geometries.
warnings.warn(f"{dep_msg}", FutureWarning)
fips2county = pd.read_csv("data/fips2county.tsv", sep="\t", comment='#', dtype=str).sort_values(by="CountyFIPS")
fips2countygeo = geopandas.read_file("data/plotly_usa_geojson-counties-fips.json").sort_values(by="id")
#fips2countygeo = geopandas.read_file("https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json")
countiesgeo = geopandas.read_file("https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json")
countiesgeo = countiesgeo.sort_values(by="id")
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
counties = json.load(response)
#counties["features"][0]["id"]
counties2 = counties.copy()
counties2["features"] = sorted(counties2["features"], key=lambda d: d['id'])
counties = counties2
import pandas as pd
# The ent CSV file only contains the counties which are analyzable
df_orig = pd.read_csv("data/2022_04_10 ent initial output.csv", dtype={"FIPS": str}).sort_values(by="FIPS")
# Merge with the fips 2 county standard data set
df_wide = pd.merge(left=df_orig, right=fips2county, how="left", left_on='FIPS', right_on='CountyFIPS')
# Insert a county "County, ST" col (i.e. "Freehold, NJ" or "Chicago, IL") for ease
df_wide.insert(1, "County_St", df_wide["CountyName"].astype(str) + ", " + df_wide["StateAbbr"].astype(str))
# Display with all the columns
with pd.option_context('display.max_rows', 3, 'display.max_columns', None):
display(df_wide)
pass
loc_simple = ["FIPS", "CountyName","StateAbbr", "% ASC Billing", "Moran I score for ACS billing fraction"]
df_wide_simple=df_wide[loc_simple]
loc_main = ["FIPS", "County", "StateFIPS", "Total Medicare Payment Amount", "% ASC Procedures", "% ASC Billing", "CountyFIPS_3", "CountyName", "StateName", "CountyFIPS", "StateAbbr", "STATE_COUNTY"]
#a=pd.merge(right=df_orig, left=fips2county, how="outer", right_on='FIPS', left_on='CountyFIPS')
#a=a.loc[:,loc_main]
#df_orig2=df_orig.loc[:,["FIPS","pop","Moran I score for ACS billing fraction","County"]]
| FIPS | County_St | Total Number of Services | Total Medicare Payment Amount | Total Number of Services: 2019 | Total Medicare Payment Amount: 2019 | Total Number of Services: 2018 | Total Medicare Payment Amount: 2018 | Total Number of Services: 2017 | Total Medicare Payment Amount: 2017 | Total Number of Services: 2016 | Total Medicare Payment Amount: 2016 | Total Number of Services: 2015 | Total Medicare Payment Amount: 2015 | tot_ratio | % ASC Procedures: 2019 | % ASC Billing: 2019 | % ASC Procedures: 2018 | % ASC Billing: 2018 | % ASC Procedures: 2017 | % ASC Billing: 2017 | % ASC Procedures: 2016 | % ASC Billing: 2016 | % ASC Procedures: 2015 | % ASC Billing: 2015 | % ASC Procedures | % ASC Billing | Beneficiaries with Part A and Part B | Average Age | Percent Male | Percent Non-Hispanic White | Percent African American | Percent Hispanic | Percent Eligible for Medicaid | Average HCC Score | Hospital Readmission Rate | Emergency Department Visits per 1000 Beneficiaries | Procedures Per Capita Standardized Costs | Procedure Events Per 1000 Beneficiaries | metro | pct_poverty | median_house_income | pop | 2013_Rural_urban_cont_code | Pct_wthout_high_diploma | Pct_wth_high_diploma | Pct_wth_some_coll | Pct_wth_coll_degree | unemployment | pct_uninsured | fibro | tabacco | obesity | migrane | Alzheimers | Depression | Alcohol Abuse | Drug Abuse | Schizo_othr_psych | COPD | Chronic Kidney Disease | Osteoporosis | Stroke | Diabetes | Asthma | Arthritis | Hypertension | Heart Failure | Ischemic Heart Disease | Population Density | Medicare Population Density | Moran I score for ACS billing fraction | County | StateFIPS | CountyFIPS_3 | CountyName | StateName | CountyFIPS | StateAbbr | STATE_COUNTY | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 01003 | Baldwin, AL | 524.0 | 29853.60 | 50.0 | 4107.68 | 238.0 | 12862.19 | 236.0 | 12883.73 | 0.0 | 0.00 | 0.0 | 0.00 | 15.0 | 0.00000 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.000000 | 47450.4 | 72.2 | 46.114 | 92.722 | 4.384 | 1.06 | 8.484 | 0.920 | 15.616 | 565.6 | 811.670 | 7523.2 | 1 | 10.92 | 56582.6 | 212830.0 | 3.0 | 9.2 | 27.7 | 31.3 | 31.9 | 4.32 | 11.52 | 19.8 | 8.4 | 20.8 | 3.0 | 10.82 | 15.90 | 1.775 | 3.150 | 2.14 | 11.18 | 20.40 | 6.20 | 3.86 | 23.58 | 4.76 | 37.42 | 60.28 | 12.38 | 32.04 | 133.873533 | 29.847074 | Low-High | Baldwin | 01 | 003 | Baldwin | Alabama | 01003 | AL | AL | BALDWIN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 940 | 56041 | Uinta, WY | 347.0 | 92886.86 | 179.0 | 65588.66 | 34.0 | 5826.04 | 47.0 | 9214.44 | 51.0 | 9390.43 | 36.0 | 2867.29 | 17.0 | 43.01676 | 72.010649 | 0.0 | 0.0 | 25.531915 | 72.933569 | 0.0 | 0.0 | 0.0 | 0.0 | 25.648415 | 58.082747 | 2939.2 | 70.0 | 48.514 | 0.000 | 0.000 | 0.00 | 14.766 | 0.818 | 12.368 | 579.0 | 529.996 | 3911.8 | 0 | 9.82 | 65848.4 | 20478.8 | 7.0 | 7.3 | 41.5 | 35.2 | 16.0 | 4.74 | 12.90 | 16.0 | 9.4 | 11.4 | 2.2 | 7.08 | 16.22 | 2.200 | 2.675 | 3.26 | 11.08 | 15.58 | 2.96 | 2.22 | 23.26 | 3.72 | 24.48 | 39.52 | 11.28 | 22.38 | 9.839597 | 1.412219 | Non Significant | Uinta | 56 | 041 | Uinta | Wyoming | 56041 | WY | WY | UINTA |
941 rows × 80 columns
cols_to_keep = ["FIPS","County_St"]
col_categories = ["Total Number of Services:", "Total Medicare Payment Amount:", "% ASC Procedures:", "% ASC Billing:"]
df_longs = []
# Convert each type of category to long format in separate dataframes
for col_category in col_categories:
df_long = df_wide.melt(id_vars=cols_to_keep,
var_name="Year",
value_vars=[f"{col_category} {year}" for year in range(2015, 2019 +1)],
value_name=f"{col_category} in Year",
)
df_long["Year"] = df_long["Year"].replace({ f"{col_category} {year}":f"{year}" for year in range(2015, 2019 +1)})
df_longs.append(df_long)
# Merge the separate category dataframes
df_long = df_longs[0]
for ind in range(1,len(df_longs)):
df_long = pd.merge(left=df_long, right=df_longs[ind], how="outer", on=(cols_to_keep+["Year"]) )
# Merge with the overall wide dataframe to keep those other values
df_long = pd.merge(left=df_long,
right=df_wide.drop([f"{col_category} {year}" for year in range(2015, 2019 +1) for col_category in col_categories], axis=1),
how="left", on=cols_to_keep)
display(df_long)
| FIPS | County_St | Year | Total Number of Services: in Year | Total Medicare Payment Amount: in Year | % ASC Procedures: in Year | % ASC Billing: in Year | Total Number of Services | Total Medicare Payment Amount | tot_ratio | ... | Medicare Population Density | Moran I score for ACS billing fraction | County | StateFIPS | CountyFIPS_3 | CountyName | StateName | CountyFIPS | StateAbbr | STATE_COUNTY | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 01017 | Chambers, AL | 2015 | 0.0 | 0.00 | 0.000000 | 0.000000 | 408.0 | 30064.800000 | 14.196990 | ... | 14.230610 | Non Significant | Chambers | 01 | 017 | Chambers | Alabama | 01017 | AL | AL | CHAMBERS |
| 1 | 01033 | Colbert, AL | 2015 | 108.0 | 10404.39 | 0.000000 | 0.000000 | 272.0 | 37080.230000 | 16.000000 | ... | 22.681014 | Non Significant | Colbert | 01 | 033 | Colbert | Alabama | 01033 | AL | AL | COLBERT |
| 2 | 01045 | Dale, AL | 2015 | 0.0 | 0.00 | 0.000000 | 0.000000 | 12.0 | 405.210000 | 0.999104 | ... | 17.700437 | Non Significant | Dale | 01 | 045 | Dale | Alabama | 01045 | AL | AL | DALE |
| 3 | 01083 | Limestone, AL | 2015 | 0.0 | 0.00 | 0.000000 | 0.000000 | 55.0 | 9515.590000 | 4.000000 | ... | 29.157261 | Non Significant | Limestone | 01 | 083 | Limestone | Alabama | 01083 | AL | AL | LIMESTONE |
| 4 | 05145 | White, AR | 2015 | 1217.0 | 48412.57 | 0.000000 | 0.000000 | 1269.0 | 52190.220000 | 11.995594 | ... | 15.224018 | Non Significant | White | 05 | 145 | White | Arkansas | 05145 | AR | AR | WHITE |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 4700 | 21073 | Franklin, KY | 2019 | 0.0 | 0.00 | 0.000000 | 0.000000 | 114.0 | 7749.960000 | 3.910144 | ... | 62.858188 | Non Significant | Franklin | 21 | 073 | Franklin | Kentucky | 21073 | KY | KY | FRANKLIN |
| 4701 | 56021 | Laramie, WY | 2019 | 422.0 | 79083.86 | 100.000000 | 100.000000 | 1784.0 | 337949.890001 | 21.000000 | ... | 6.286729 | Non Significant | Laramie | 56 | 021 | Laramie | Wyoming | 56021 | WY | WY | LARAMIE |
| 4702 | 54041 | Lewis, WV | 2019 | 0.0 | 0.00 | 0.000000 | 0.000000 | 606.0 | 26648.230000 | 4.000000 | ... | 10.524948 | Low-Low | Lewis | 54 | 041 | Lewis | West Virginia | 54041 | WV | WV | LEWIS |
| 4703 | 50027 | Windsor, VT | 2019 | 319.0 | 12093.61 | 0.000000 | 0.000000 | 1132.0 | 47825.130000 | 35.000000 | ... | 14.320922 | Low-Low | Windsor | 50 | 027 | Windsor | Vermont | 50027 | VT | VT | WINDSOR |
| 4704 | 51041 | Chesterfield, VA | 2019 | 111.0 | 47135.49 | 28.828829 | 51.859904 | 617.0 | 204885.309999 | 34.339788 | ... | 124.679835 | Non Significant | Chesterfield | 51 | 041 | Chesterfield | Virginia | 51041 | VA | VA | CHESTERFIELD |
4705 rows × 65 columns
df_wide_geom = pd.merge(left=countiesgeo, right=df_wide, how="right", left_on='id', right_on='FIPS')
df_wide_simple_geom = pd.merge(left=countiesgeo, right=df_wide_simple, how="right", left_on='id', right_on='FIPS')
df_wide_geom = df_wide_geom.set_index("FIPS").sort_index()
df_wide_simple_geom = df_wide_simple_geom.set_index("FIPS").sort_index()
fig = px.choropleth(df_wide_geom, geojson=counties2, locations=df_wide_geom.index,
color='% ASC Procedures: 2019',
color_continuous_scale="Viridis",
#range_color=(0, 12),
scope="usa",
#facet_col="Moran I score for ACS billing fraction",
labels={
"2013_Rural_urban_cont_code":"2013-RUCA",
"pop":"Pop.",
"Average Age":"Mean Age",
"Percent Male":"% M",
"tot_ratio":"Tot. Ratio",
},
)
fig.update_layout(
hoverlabel=dict(
bgcolor="white",
font_size=16,
font_family="Rockwell",
align="auto"
)
)
fig.update_layout(legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
))
# Define layout specificities
fig.update_layout(
margin={"r":0,"t":0,"l":0,"b":0},
title={
'text': f"% ASC Procedures 2019",
'y':0.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'
}
)
fig.show()
#save_figure(fig,"choropleth-total")
fig = px.choropleth(df_wide_simple_geom, geojson=df_wide_simple_geom.geometry, locations=df_wide_simple_geom.index,
color='% ASC Billing',
color_continuous_scale="Viridis",
#range_color=(0, 12),
scope="usa",
#facet_col="Moran I score for ACS billing fraction",
labels={
"2013_Rural_urban_cont_code":"2013-RUCA",
"pop":"Pop.",
"Average Age":"Mean Age",
"Percent Male":"% M",
"tot_ratio":"Tot. Ratio",
},
)
fig.update_layout(
hoverlabel=dict(
bgcolor="white",
font_size=16,
font_family="Rockwell",
align="auto"
)
)
fig.update_layout(legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1
))
# Define layout specificities
fig.update_layout(
margin={"r":0,"t":0,"l":0,"b":0},
title={
'text': f"% ASC Billing",
'y':0.95,
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'
}
)
fig.show()
#save_figure(fig,"choropleth-total")
w = weights.KNN.from_dataframe(df_wide_simple_geom, k=8)
# Row-standardization
w.transform = 'R'
db = df_wide_simple_geom.copy()
db['% ASC Billing_lag'] = weights.spatial_lag.lag_spatial(
w, db['% ASC Billing']
)
fig = px.choropleth(db,
geojson=db.geometry,
locations=db.index,
color='% ASC Billing_lag',
color_continuous_scale="Viridis",
#range_color=(0, 12),
scope="usa",
)
fig.show()
#save_figure(fig,"choropleth-total")
w.transform = 'R'
moran = esda.moran.Moran(db['% ASC Billing'], w)
moran.I, moran.p_sim
(0.12196058782012296, 0.001)
lisa = esda.moran.Moran_Local(db['% ASC Billing'], w)
import seaborn
# Draw KDE line
ax = seaborn.kdeplot(lisa.Is)
# Add one small bar (rug) for each observation
# along horizontal axis
seaborn.rugplot(lisa.Is, ax=ax);
from splot import esda as esdaplot
import matplotlib.pyplot as plt # Graphics
from matplotlib import colors
# Set up figure and axes
f, axs = plt.subplots(nrows=2, ncols=2, figsize=(12, 12))
# Make the axes accessible with single indexing
axs = axs.flatten()
# Subplot 1 #
# Choropleth of local statistics
# Grab first axis in the figure
ax = axs[0]
# Assign new column with local statistics on-the-fly
db.assign(
Is=lisa.Is
# Plot choropleth of local statistics
).plot(
column='Is',
cmap='plasma',
scheme='quantiles',
k=5,
edgecolor='white',
linewidth=0.1,
alpha=0.75,
legend=True,
ax=ax
)
# Subplot 2 #
# Quadrant categories
# Grab second axis of local statistics
ax = axs[1]
# Plot Quandrant colors (note to ensure all polygons are assigned a
# quadrant, we "trick" the function by setting significance level to
# 1 so all observations are treated as "significant" and thus assigned
# a quadrant color
esdaplot.lisa_cluster(lisa, db, p=1, ax=ax);
# Subplot 3 #
# Significance map
# Grab third axis of local statistics
ax = axs[2]
#
# Find out significant observations
labels = pd.Series(
1 * (lisa.p_sim < 0.05), # Assign 1 if significant, 0 otherwise
index=db.index # Use the index in the original data
# Recode 1 to "Significant and 0 to "Non-significant"
).map({1: 'Significant', 0: 'Non-Significant'})
# Assign labels to `db` on the fly
db.assign(
cl=labels
# Plot choropleth of (non-)significant areas
).plot(
column='cl',
categorical=True,
k=2,
cmap='Paired',
linewidth=0.1,
edgecolor='white',
legend=True,
ax=ax
)
# Subplot 4 #
# Cluster map
# Grab second axis of local statistics
ax = axs[3]
# Plot Quandrant colors In this case, we use a 5% significance
# level to select polygons as part of statistically significant
# clusters
esdaplot.lisa_cluster(lisa, db, p=0.05, ax=ax);
# Figure styling #
# Set title to each subplot
for i, ax in enumerate(axs.flatten()):
ax.set_axis_off()
ax.set_title(
[
'Local Statistics',
'Scatterplot Quadrant',
'Statistical Significance',
'Moran Cluster Map'
][i], y=0
)
# Tight layout to minimise in-betwee white space
f.tight_layout()
# Display the figure
plt.show()